Author: Amanda Everitt
Began: 8/20/2018
Finished:: 8/22/2018
| Genes | ||
|---|---|---|
| Initial | 27998 | |
| -13065 | Removed genes that didn’t occur in >17 cells (0.1% of population) | |
| Final | 14933 |
| Cells | ||
|---|---|---|
| Initial | 17823 | |
| -163 | Removed cells with nUMI <500 or >100000 | |
| -182 | Removed cells with nGene <500 or >3000 | |
| -82 | Removed cells with a mitochondrial percent >30% | |
| Final | 17396 |
dataset_loc <- "/Users/AEveritt/projects/scRNAseq_L5_Tbr1/raw_data/mm10/"
ids <- c("L5HET/", "L5NULL/","L5WT/")
d10x.data <- sapply(ids, function(i){
d10x <- Seurat::Read10X(file.path(dataset_loc,i))
colnames(d10x) <- paste(sapply(strsplit(colnames(d10x),split="-"),'[[',1L),i,sep="-")
d10x
})
experiment.data <- do.call("cbind", d10x.data)
experiment.aggregate <- CreateSeuratObject(
experiment.data,
project = "Siavash scRNA",
names.field = 2,
min.cells = 17,
names.delim = "\\-")
experiment.aggregate
## An object of class seurat in project Siavash scRNA
## 14933 genes across 17823 samples.
mito.genes <- grep("^mt-", rownames(experiment.aggregate@data), value = T)
percent.mito <- Matrix::colSums(experiment.aggregate@raw.data[mito.genes, ]) / Matrix::colSums(experiment.aggregate@raw.data)
experiment.aggregate <- AddMetaData(
object = experiment.aggregate,
metadata = percent.mito,
col.name= "percent.mito")
ribo.genes <- grep("^Rps|^Rpl|^Mrps|^Mrpl|^Gm", rownames(experiment.aggregate@data), value = T)
percent.ribo <- Matrix::colSums(experiment.aggregate@raw.data[ribo.genes, ]) / Matrix::colSums(experiment.aggregate@raw.data)
experiment.aggregate <- AddMetaData(
object = experiment.aggregate,
metadata = percent.ribo,
col.name= "percent.ribo")
pseudo.genes <- grep("^Gm", rownames(experiment.aggregate@data), value = T)
percent.pseudo <- Matrix::colSums(experiment.aggregate@raw.data[pseudo.genes, ]) / Matrix::colSums(experiment.aggregate@raw.data)
experiment.aggregate <- AddMetaData(
object = experiment.aggregate,
metadata = percent.pseudo,
col.name= "percent.pseudo")
p1a <- ggplot(experiment.aggregate@meta.data, aes(x=nUMI)) +
geom_histogram(binwidth=50, color="black", fill="white") + ggtitle("Distribution of nUMI") +
geom_vline(aes(xintercept=500),color="red") + geom_vline(aes(xintercept=10000),color="red") + theme_bw()
p1b <- VlnPlot(experiment.aggregate, "nUMI", do.return = TRUE) + geom_hline(yintercept = 10000, color= "red")
p1b <- p1b + ggtitle("Distribution of nUMI by genotype") + xlab("Genotype") + ylab("Count") + theme_bw() +
theme(plot.title = element_text(color="black", size=12),
axis.title.x = element_text(color="black", size=12),
axis.title.y = element_text(color="black", size=12),
legend.position = "none")
p2a <- ggplot(experiment.aggregate@meta.data, aes(x=nGene)) +
geom_histogram(binwidth=50, color="black", fill="white") + ggtitle("Distribution of nGene") +
geom_vline(aes(xintercept=500),color="red") + geom_vline(aes(xintercept=3000),color="red") + theme_bw()
p2b <- VlnPlot(experiment.aggregate, "nGene", do.return = TRUE) + geom_hline(yintercept = 3000, color= "red")
p2b <- p2b + ggtitle("Distribution of nGene by genotype") + xlab("Genotype") + ylab("Count") + theme_bw() +
theme(plot.title = element_text(color="black", size=12),
axis.title.x = element_text(color="black", size=12),
axis.title.y = element_text(color="black", size=12),
legend.position = "none")
p3a <- ggplot(experiment.aggregate@meta.data, aes(x=percent.mito)) +
geom_histogram(binwidth=0.01, color="black", fill="white") + ggtitle("Distribution of mitochondrial percentage") +
geom_vline(aes(xintercept=0.3),color="red") + theme_bw()
p3b <- VlnPlot(experiment.aggregate, "percent.mito", do.return = TRUE) + geom_hline(yintercept = 0.3, color= "red")
p3b <- p3b + ggtitle("Distribution of mito % by genotype") + xlab("Genotype") + ylab("Count") + theme_bw() +
theme(plot.title = element_text(color="black", size=12),
axis.title.x = element_text(color="black", size=12),
axis.title.y = element_text(color="black", size=12),
legend.position = "none")
grid.arrange(p1a,p2a, p3a, p1b, p2b, p3b, ncol=3)
experiment.aggregate <- FilterCells(object = experiment.aggregate, subset.names = "nUMI",
low.thresholds = 500 , high.thresholds = 10000)
experiment.aggregate <- FilterCells(object = experiment.aggregate, subset.names = "nGene",
low.thresholds = 500 , high.thresholds = 3000)
experiment.aggregate <- FilterCells(object = experiment.aggregate, subset.names = "percent.mito",
low.thresholds = -Inf , high.thresholds = 0.3)
#Save Data
save(experiment.aggregate, file=paste0(out_dir,"/experiment.aggregate.filtered.Rdata"))
#Clean up workspace
#detach("package:Seurat", unload=TRUE)
#detach("package:cowplot", unload=TRUE)
#gc()
#R.utils::gcDLLs()
core = SingleCellExperiment(assays = list(counts = as.matrix(experiment.aggregate@data)),
colData = as.data.frame(experiment.aggregate@meta.data),
rowData = rownames(as.matrix(experiment.aggregate@data)))
core <- calculateQCMetrics(core)
core <- normalize(core)
## Warning in .local(object, ...): using library sizes as size factors
scater::plotExprsFreqVsMean(core)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
pca_exp <- scater::runPCA(core, use_coldata = F, detect_outliers = TRUE)
plotReducedDim(pca_exp, use_dimred="PCA", colour_by="orig.ident")
summary(pca_exp$outlier)
## Length Class Mode
## 0 NULL NULL
pca_coldata <- scater::runPCA(core, use_coldata = T, detect_outliers = TRUE,
selected_variables = c("nGene", "pct_counts_in_top_500_features", "percent.mito","nUMI"))
summary(pca_coldata$outlier)
## Mode FALSE TRUE
## logical 16702 694
plotReducedDim(pca_coldata, use_dimred="PCA_coldata", colour_by="orig.ident")
plotReducedDim(pca_coldata, use_dimred="PCA_coldata", colour_by="outlier")
possible_outliers <- rownames(pca_coldata@colData[pca_coldata$outlier == T,])
save(possible_outliers, file=paste0(out_dir,"/possible_outliers.Rdata"))
#scater::plotExplanatoryVariables(core) #takes too long to plot for knitr and im impatient
knitr::include_graphics(paste0(out_dir,'/expl_plot.jpeg'))
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin17.7.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS/LAPACK: /usr/local/Cellar/openblas/0.3.6_1/lib/libopenblasp-r0.3.6.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] gridExtra_2.3 mvoutlier_2.0.9
## [3] sgeostat_1.0-27 scater_1.12.2
## [5] SingleCellExperiment_1.6.0 SummarizedExperiment_1.14.1
## [7] DelayedArray_0.10.0 BiocParallel_1.18.1
## [9] matrixStats_0.55.0 Biobase_2.44.0
## [11] GenomicRanges_1.36.1 GenomeInfoDb_1.20.0
## [13] IRanges_2.18.3 S4Vectors_0.22.1
## [15] BiocGenerics_0.30.0 Seurat_2.3.4
## [17] Matrix_1.2-17 cowplot_1.0.0
## [19] ggplot2_3.2.1 knitr_1.25
##
## loaded via a namespace (and not attached):
## [1] reticulate_1.13 R.utils_2.9.0
## [3] tidyselect_0.2.5 htmlwidgets_1.5.1
## [5] grid_3.6.0 ranger_0.11.2
## [7] Rtsne_0.15 munsell_0.5.0
## [9] codetools_0.2-16 ica_1.0-2
## [11] sROC_0.1-2 withr_2.1.2
## [13] colorspace_1.4-1 rstudioapi_0.10
## [15] ROCR_1.0-7 robustbase_0.93-5
## [17] dtw_1.21-3 vcd_1.4-4
## [19] VIM_4.8.0 gbRd_0.4-11
## [21] labeling_0.3 Rdpack_0.11-0
## [23] lars_1.2 GenomeInfoDbData_1.2.1
## [25] cvTools_0.3.2 bit64_0.9-7
## [27] vctrs_0.2.0 xfun_0.10
## [29] diptest_0.75-7 R6_2.4.0
## [31] ggbeeswarm_0.6.0 robCompositions_2.1.0
## [33] rsvd_1.0.2 hdf5r_1.3.0
## [35] flexmix_2.3-15 bitops_1.0-6
## [37] reshape_0.8.8 assertthat_0.2.1
## [39] SDMTools_1.1-221.1 scales_1.0.0
## [41] nnet_7.3-12 beeswarm_0.2.3
## [43] gtable_0.3.0 npsurv_0.4-0
## [45] rlang_0.4.0 zeallot_0.1.0
## [47] splines_3.6.0 lazyeval_0.2.2
## [49] acepack_1.4.1 checkmate_1.9.4
## [51] yaml_2.2.0 reshape2_1.4.3
## [53] abind_1.4-5 backports_1.1.5
## [55] Hmisc_4.2-0 tools_3.6.0
## [57] zCompositions_1.3.2-1 gplots_3.0.1.1
## [59] RColorBrewer_1.1-2 proxy_0.4-23
## [61] ggridges_0.5.1 Rcpp_1.0.2
## [63] plyr_1.8.4 base64enc_0.1-3
## [65] zlibbioc_1.30.0 purrr_0.3.2
## [67] RCurl_1.95-4.12 rpart_4.1-15
## [69] pbapply_1.4-2 viridis_0.5.1
## [71] zoo_1.8-6 haven_2.1.1
## [73] cluster_2.1.0 magrittr_1.5
## [75] data.table_1.12.4 openxlsx_4.1.0.1
## [77] lmtest_0.9-37 RANN_2.6.1
## [79] truncnorm_1.0-8 mvtnorm_1.0-11
## [81] fitdistrplus_1.0-14 hms_0.5.1
## [83] lsei_1.2-0 evaluate_0.14
## [85] rio_0.5.16 mclust_5.4.5
## [87] readxl_1.3.1 compiler_3.6.0
## [89] tibble_2.1.3 KernSmooth_2.23-15
## [91] crayon_1.3.4 R.oo_1.22.0
## [93] htmltools_0.4.0 mgcv_1.8-29
## [95] segmented_1.0-0 pcaPP_1.9-73
## [97] Formula_1.2-3 snow_0.4-3
## [99] tidyr_1.0.0 rrcov_1.4-7
## [101] MASS_7.3-51.4 fpc_2.2-3
## [103] boot_1.3-23 car_3.0-3
## [105] R.methodsS3_1.7.1 gdata_2.18.0
## [107] metap_1.1 igraph_1.2.4.1
## [109] forcats_0.4.0 pkgconfig_2.0.3
## [111] foreign_0.8-72 laeken_0.5.0
## [113] sp_1.3-1 foreach_1.4.7
## [115] vipor_0.4.5 XVector_0.24.0
## [117] bibtex_0.4.2 NADA_1.6-1
## [119] stringr_1.4.0 digest_0.6.21
## [121] pls_2.7-2 tsne_0.1-3
## [123] rmarkdown_1.16 cellranger_1.1.0
## [125] htmlTable_1.13.2 DelayedMatrixStats_1.6.1
## [127] curl_4.2 kernlab_0.9-27
## [129] gtools_3.8.1 modeltools_0.2-22
## [131] lifecycle_0.1.0 nlme_3.1-141
## [133] jsonlite_1.6 carData_3.0-2
## [135] BiocNeighbors_1.2.0 viridisLite_0.3.0
## [137] pillar_1.4.2 lattice_0.20-38
## [139] GGally_1.4.0 httr_1.4.1
## [141] DEoptimR_1.0-8 survival_2.44-1.1
## [143] glue_1.3.1 zip_2.0.4
## [145] png_0.1-7 prabclus_2.3-1
## [147] iterators_1.0.12 bit_1.1-14
## [149] class_7.3-15 stringi_1.4.3
## [151] mixtools_1.1.0 BiocSingular_1.0.0
## [153] doSNOW_1.0.18 latticeExtra_0.6-28
## [155] caTools_1.17.1.2 dplyr_0.8.3
## [157] irlba_2.3.3 e1071_1.7-2
## [159] ape_5.3